data <- read.csv("01_data_cleaning/shinkansen_stations_geocoded.csv")
Count the number of Shinkansen_Line operated by each company. Note:
There are 7 intersection stations, and we count the number of
Shinkansen_Line twice for the intersection stations. For this, we split
the string in the Shinkansen_Line column when it has both
Shinkansen line names and unnest the data.
dat <- data %>%
mutate(Line_name = strsplit(Shinkansen_Line, ",")) %>%
tidyr::unnest(Line_name) %>%
select(-Shinkansen_Line) %>%
rename(Shinkansen_Line = Line_name)
dat$Shinkansen_Line <- gsub("-", "_", dat$Shinkansen_Line)
dat$Shinkansen_Line <- gsub("Shinknsen", "Shinkansen", dat$Shinkansen_Line)
cnt_by_company <- dat %>%
count(Company, Shinkansen_Line) %>%
group_by(Company) %>%
mutate(total_n = sum(n)) %>%
ungroup()
annotations <- list()
for (i in 1:length(unique(cnt_by_company$Company))) {
annotations[[i]] <- list(
x = unique(cnt_by_company$Company)[[i]],
y = unique(cnt_by_company$total_n)[[i]],
text = unique(cnt_by_company$total_n)[[i]],
yanchor = "bottom",
showarrow = FALSE
)
}
cnt_by_company %>%
plot_ly(
x = ~Company,
y = ~n,
color = ~Shinkansen_Line,
colors = "Spectral",
hoverinfo = "text",
text = ~ paste(n, "stations in", Shinkansen_Line)
) %>%
add_bars() %>%
layout(
barmode = "stack",
xaxis = list(title = "Company"),
yaxis = list(title = "Number of Shinkansen Line"),
title = "Number of Shinkansen Line by Company",
annotations = annotations
)
colramp <- colorRamp(c("darkblue", "forestgreen", "red", "darkred", "orange"))
boxp_company <- dat %>%
plot_ly(
y = ~Distance.from.Tokyo.st,
color = ~Company,
colors = colramp,
type = "box",
showlegend = FALSE
) %>%
layout(
xaxis = list(title = "Company"),
title = "Boxplots of Distance from Tokyo Station by Company"
)
boxp_line <- dat %>%
mutate(Line = str_replace_all(Shinkansen_Line, "_Shinkansen", "")) %>%
plot_ly(
y = ~Distance.from.Tokyo.st,
color = ~Line,
type = "box",
showlegend = FALSE
) %>%
layout(
xaxis = list(title = "Shinkansen Line"),
title = "Boxplots of Distance from Tokyo Station by Shinkansen Line"
)
subplot(boxp_company, boxp_line, nrows = 1, titleX = TRUE, margin = 0.03) %>%
layout(
title = "Boxplots of Distance from Tokyo Station by Company and Shinkansen Line",
plot_bgcolor = "#e5ecf6"
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors